library(ggplot2)
library(gridExtra)
library(grid)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
delay = read.csv('data/ABIA.csv')
airport = read.csv('data/airport.csv')
summary(delay)
## Year Month DayofMonth DayOfWeek
## Min. :2008 Min. : 1.00 Min. : 1.00 Min. :1.000
## 1st Qu.:2008 1st Qu.: 3.00 1st Qu.: 8.00 1st Qu.:2.000
## Median :2008 Median : 6.00 Median :16.00 Median :4.000
## Mean :2008 Mean : 6.29 Mean :15.73 Mean :3.902
## 3rd Qu.:2008 3rd Qu.: 9.00 3rd Qu.:23.00 3rd Qu.:6.000
## Max. :2008 Max. :12.00 Max. :31.00 Max. :7.000
##
## DepTime CRSDepTime ArrTime CRSArrTime
## Min. : 1 Min. : 55 Min. : 1 Min. : 5
## 1st Qu.: 917 1st Qu.: 915 1st Qu.:1107 1st Qu.:1115
## Median :1329 Median :1320 Median :1531 Median :1535
## Mean :1329 Mean :1320 Mean :1487 Mean :1505
## 3rd Qu.:1728 3rd Qu.:1720 3rd Qu.:1903 3rd Qu.:1902
## Max. :2400 Max. :2346 Max. :2400 Max. :2400
## NA's :1413 NA's :1567
## UniqueCarrier FlightNum TailNum ActualElapsedTime
## WN :34876 Min. : 1 : 1104 Min. : 22.0
## AA :19995 1st Qu.: 640 N678CA : 195 1st Qu.: 57.0
## CO : 9230 Median :1465 N511SW : 180 Median :125.0
## YV : 4994 Mean :1917 N526SW : 176 Mean :120.2
## B6 : 4798 3rd Qu.:2653 N528SW : 172 3rd Qu.:164.0
## XE : 4618 Max. :9741 N520SW : 168 Max. :506.0
## (Other):20749 (Other):97265 NA's :1601
## CRSElapsedTime AirTime ArrDelay DepDelay
## Min. : 17.0 Min. : 3.00 Min. :-129.000 Min. :-42.000
## 1st Qu.: 58.0 1st Qu.: 38.00 1st Qu.: -9.000 1st Qu.: -4.000
## Median :130.0 Median :105.00 Median : -2.000 Median : 0.000
## Mean :122.1 Mean : 99.81 Mean : 7.065 Mean : 9.171
## 3rd Qu.:165.0 3rd Qu.:142.00 3rd Qu.: 10.000 3rd Qu.: 8.000
## Max. :320.0 Max. :402.00 Max. : 948.000 Max. :875.000
## NA's :11 NA's :1601 NA's :1601 NA's :1413
## Origin Dest Distance TaxiIn
## AUS :49623 AUS :49637 Min. : 66 Min. : 0.000
## DAL : 5583 DAL : 5573 1st Qu.: 190 1st Qu.: 4.000
## DFW : 5508 DFW : 5506 Median : 775 Median : 5.000
## IAH : 3704 IAH : 3691 Mean : 705 Mean : 6.413
## PHX : 2786 PHX : 2783 3rd Qu.:1085 3rd Qu.: 7.000
## DEN : 2719 DEN : 2673 Max. :1770 Max. :143.000
## (Other):29337 (Other):29397 NA's :1567
## TaxiOut Cancelled CancellationCode Diverted
## Min. : 1.00 Min. :0.00000 :97840 Min. :0.000000
## 1st Qu.: 9.00 1st Qu.:0.00000 A: 719 1st Qu.:0.000000
## Median : 12.00 Median :0.00000 B: 605 Median :0.000000
## Mean : 13.96 Mean :0.01431 C: 96 Mean :0.001824
## 3rd Qu.: 16.00 3rd Qu.:0.00000 3rd Qu.:0.000000
## Max. :305.00 Max. :1.00000 Max. :1.000000
## NA's :1419
## CarrierDelay WeatherDelay NASDelay SecurityDelay
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00 1st Qu.: 0.00
## Median : 0.00 Median : 0.00 Median : 2.00 Median : 0.00
## Mean : 15.39 Mean : 2.24 Mean : 12.47 Mean : 0.07
## 3rd Qu.: 16.00 3rd Qu.: 0.00 3rd Qu.: 16.00 3rd Qu.: 0.00
## Max. :875.00 Max. :412.00 Max. :367.00 Max. :199.00
## NA's :79513 NA's :79513 NA's :79513 NA's :79513
## LateAircraftDelay
## Min. : 0.00
## 1st Qu.: 0.00
## Median : 6.00
## Mean : 22.97
## 3rd Qu.: 30.00
## Max. :458.00
## NA's :79513
attach(delay)
delay$CRSDepHr = as.factor(format(strptime(str_pad(delay$CRSDepTime,4,side = 'left', pad = '0'),'%H%M'),'%H'))
departure = delay[delay$Origin == 'AUS',]
departure = departure[!is.na(departure$DepDelay),]
departure = departure[departure$DepDelay>0,]
departure$Month = as.factor(departure$Month)
departure$DayofMonth = as.factor(departure$DayofMonth)
departure$DayOfWeek = as.factor(departure$DayOfWeek)
departure$HrWkAvg = ave(departure$DepDelay,departure$DayOfWeek,departure$CRSDepHr)
departure$DoMAvg = ave(departure$DepDelay,departure$Month,departure$DayofMonth)
departure$DestAvg = ave(departure$DepDelay,departure$Dest)
departure$CarrierDAvg = ave(departure$DepDelay,departure$DayOfWeek,departure$UniqueCarrier)
departure$HrCnt = as.numeric(ave(as.character(departure$CRSDepHr), as.character(departure$DayOfWeek), as.character(departure$CRSDepHr), FUN = length))
departure$DoMCnt = as.numeric(ave(as.character(departure$DayofMonth), as.character(departure$Month),as.character(departure$DayofMonth), FUN = length))
departure$DestCnt = as.numeric(ave(as.character(departure$Dest), as.character(departure$Dest), FUN = length))
departure$CarrierMCnt = as.numeric(ave(as.character(departure$UniqueCarrier), as.character(departure$Month),as.character(departure$UniqueCarrier), FUN = length))
departure = merge(departure,airport, by.x = 'Dest', by.y = 'airport')
#Week
MtoTH= subset(departure, DayOfWeek == 1|DayOfWeek==2|DayOfWeek==3|DayOfWeek==4)
FtoS = subset(departure, DayOfWeek == 5|DayOfWeek==6|DayOfWeek==7)
week1 = ggplot(MtoTH, aes(x = CRSDepHr, y = HrWkAvg, group = DayOfWeek, color = DayOfWeek)) +
geom_point() +
geom_line(size = 1) +
ylim(0,120) +
xlab('Time of Day') +
ylab('Average delay in minutes') +
ggtitle('Flight delay in Monday to Thursday')
week2 = ggplot(FtoS, aes(x = CRSDepHr, y = HrWkAvg, group = DayOfWeek, color = DayOfWeek)) +
geom_point() +
geom_line(size = 1) +
ylim(0,120) +
xlab('Time of Day') +
ylab('Average delay in minutes') +
ggtitle('Flight delay Friday to Sunday')
grid.arrange(week1, week2, ncol = 2)

#Season and Month
season1 = subset(departure, Month == 1|Month==2|Month==3)
season2 = subset(departure, Month == 4|Month==5|Month==6)
season3 = subset(departure, Month == 7|Month==8|Month==9)
season4 = subset(departure, Month == 10|Month==11|Month==12)
s1 = ggplot(season1, aes(x = DayofMonth, y = DoMAvg, group = Month, color = Month)) +
geom_point() +
geom_line(size = 1) +
ylim(0,100) +
xlab('Day of Month') +
ylab('Average delay in minutes') +
ggtitle('Flight delay in seaon 1')
s2 = ggplot(season2, aes(x = DayofMonth, y = DoMAvg, group = Month, color = Month)) +
geom_point() +
geom_line(size = 1) +
ylim(0,100) +
xlab('Day of Month') +
ylab('Average delay in minutes') +
ggtitle('Flight delay in seaon 2')
s3 = ggplot(season3, aes(x = DayofMonth, y = DoMAvg, group = Month, color = Month)) +
geom_point() +
geom_line(size = 1) +
ylim(0,100) +
xlab('Day of Month') +
ylab('Average delay in minutes') +
ggtitle('Flight delay in season 3')
s4 = ggplot(season4, aes(x = DayofMonth, y = DoMAvg, group = Month, color = Month)) +
geom_point() +
geom_line(size = 1) +
ylim(0,100) +
xlab('Day of Month') +
ylab('Average delay in minutes') +
ggtitle('Flight delay in season 4')
grid.arrange(s1,s2,s3,s4, nrow = 2, ncol = 2)

#Airline
airlinelarge = c('WN','AA','CO','YV','B6','XE','OO','OH')
airlinesmall = c('MQ','9E','DL','F9','UA','US','EV','NW')
airlarge = departure[departure$UniqueCarrier %in% airlinelarge,]
airsmall = departure[departure$UniqueCarrier %in% airlinesmall,]
air1 = ggplot(airlarge, aes(x = DayOfWeek, y = CarrierDAvg, group = UniqueCarrier, color = UniqueCarrier)) +
geom_point() +
geom_line(size = 1) +
ylim(0,120) +
xlab('Day of Week') +
ylab('Average delay in minutes') +
ggtitle('Flight delay of Major Airlines')
air2 = ggplot(airsmall, aes(x = DayOfWeek, y = CarrierDAvg, group = UniqueCarrier, color = UniqueCarrier)) +
geom_point() +
geom_line(size = 1) +
ylim(0,120) +
xlab('Time of Day') +
ylab('Average delay in minutes') +
ggtitle('Flight delay of Minor Airlines')
grid.arrange(air1,air2,ncol=2)

#Dest
NE = c('BOS','EWR','JFK','PHL')
MW = c('MDW','ORD','IND','DSM','MSP','MCI','STL','CLE','CVG','DTW')
SA = c('FLL','JAX','MCO','TPA','ATL','BWI','CLT','RDU','IAD','ORF')
ESC = c('BNA','MEM')
WSC = c('MSY','OKC','TUL','DAL','DFW','ELP','HOU','HRL','IAH','LBB','MAF')
M = c('PHX','TUS','DEN','LAS','ABQ','SLC')
P = c('LAX','LGB','OAK','ONT','SAN','SFO','SJC','SNA','SEA')
NEdep = departure[departure$Dest %in% NE,]
MWdep = departure[departure$Dest %in% MW,]
SAdep = departure[departure$Dest %in% SA,]
ESCdep = departure[departure$Dest %in% ESC,]
WSCdep = departure[departure$Dest %in% WSC,]
Mdep = departure[departure$Dest %in% M,]
Pdep = departure[departure$Dest %in% P,]
NEp = ggplot(NEdep, aes(x = Month, y = DestAvg, group = Dest, color = Dest)) +
geom_point() +
geom_line(size = 0.5) +
ylim(0,120) +
xlab('Time of Day') +
ylab('Average delay in minutes') +
ggtitle('Flight delay Friday to Sunday')
MWp = ggplot(MWdep, aes(x = Month, y = DestAvg, group = Dest, color = Dest)) +
geom_point() +
geom_line(size = 0.5) +
ylim(0,120) +
xlab('Time of Day') +
ylab('Average delay in minutes') +
ggtitle('Flight delay Friday to Sunday')
SAp = ggplot(SAdep, aes(x = Month, y = DestAvg, group = Dest, color = Dest)) +
geom_point() +
geom_line(size = 0.5) +
ylim(0,120) +
xlab('Time of Day') +
ylab('Average delay in minutes') +
ggtitle('Flight delay Friday to Sunday')
ESCp = ggplot(ESCdep, aes(x = Month, y = DestAvg, group = Dest, color = Dest)) +
geom_point() +
geom_line(size = 0.5) +
ylim(0,120) +
xlab('Time of Day') +
ylab('Average delay in minutes') +
ggtitle('Flight delay Friday to Sunday')
WSCp = ggplot(WSCdep, aes(x = Month, y = DestAvg, group = Dest, color = Dest)) +
geom_point() +
geom_line(size = 0.5) +
ylim(0,120) +
xlab('Time of Day') +
ylab('Average delay in minutes') +
ggtitle('Flight delay Friday to Sunday')
Mp = ggplot(Mdep, aes(x = Month, y = DestAvg, group = Dest, color = Dest)) +
geom_point() +
geom_line(size = 0.5) +
ylim(0,120) +
xlab('Time of Day') +
ylab('Average delay in minutes') +
ggtitle('Flight delay Friday to Sunday')
Pp = ggplot(Pdep, aes(x = Month, y = DestAvg, group = Dest, color = Dest)) +
geom_point() +
geom_line(size = 0.5) +
ylim(0,120) +
xlab('Time of Day') +
ylab('Average delay in minutes') +
ggtitle('Flight delay Friday to Sunday')
#destination
departure =
dest = departure[,c('Dest','DestAvg','DestCnt','lon','lat')]
dest = unique(dest)
g <- list(
scope = 'usa',
projection = list(type = 'albers usa'),
showland = TRUE,
landcolor = toRGB("gray85"),
subunitwidth = 1,
countrywidth = 1,
subunitcolor = toRGB("white"),
countrycolor = toRGB("white")
)
p <- plot_geo(dest, locationmode = 'USA-states', sizes = c(1, 250),text = ~Dest) %>%
add_text(x = ~lon, y = ~lat, textposition = "top") %>%
add_markers(
x = ~lon, y = ~lat, size = ~DestAvg, color = ~DestAvg, hoverinfo = "text",
text = ~paste(dest$Dest, "<br />", "Average Delay",dest$DestAvg))%>%
layout(title = '2008 Austin Departure Flights Average Delay by Destination', geo = g)
Map_1 <- plotly_build(p)$x
l <- length(Map_1$data)
for (i in 1:l)
{
Map_1$data[[i]]$hoverinfo <- NULL
}
Map_1 <- as_widget(Map_1)
Map_1